#pragma once
#include <chrono>
#include <cmath>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <map>
#include <numeric>
#include <string>
#include <vector>

#define USE_OMP_GET_TIME 0
#define USE_TBB_GET_TIME 0

#if USE_OMP_GET_TIME == 1
#include <omp.h>
#endif

#if USE_TBB_GET_TIME == 1
#include <tbb/tbb.h>
#endif

// 三种方式都没有区别好像，当并行运算时，计算的外圈时间对，但加权时间不对，以外圈的时间为准即可
// Timer::evaluate("build_add", [&, this]() {  });
/// timer
class Timer {
public:
    struct TimeRecorder {
        TimeRecorder() = default;
        TimeRecorder(const std::string& name, double time_usage) {
            func_name_ = name;
            time_usage_ms_.emplace_back(time_usage);
        }
        std::string func_name_;
        std::vector<double> time_usage_ms_;
    };

    enum TIME_MODE {
        TIME_MODE_NORMAL = 0,
        TIME_MODE_OPENMP = 1,
        TIME_MODE_TBB = 2,
    };

    template <class F>
    static void Evaluate(const std::string& func_name, F&& func, int time_mode = TIME_MODE_NORMAL) {
        // 提前检测一次，records_order_中顺序则为谁先调用谁靠前
        bool isExist = (records_.find(func_name) == records_.end());
        if (isExist) records_order_.push_back(func_name);

        double time_used = 0.0;
        if (time_mode == TIME_MODE_NORMAL) {
            auto t1 = std::chrono::high_resolution_clock::now();
            std::forward<F>(func)();
            auto t2 = std::chrono::high_resolution_clock::now();
            time_used = std::chrono::duration_cast<std::chrono::duration<double>>(t2 - t1).count() * 1000;
        } else if (time_mode == TIME_MODE_OPENMP) {
#if USE_OMP_GET_TIME == 1
            auto t1 = omp_get_wtime();
            std::forward<F>(func)();
            auto t2 = omp_get_wtime();
            time_used = (t2 - t1) * 1000.0;
#endif
        } else if (time_mode == TIME_MODE_TBB) {
#if USE_TBB_GET_TIME == 1
            auto t1 = tbb::tick_count::now();
            std::forward<F>(func)();
            auto t2 = tbb::tick_count::now();
            time_used = (t2 - t1).seconds() * 1000.0;
#endif
        }
        if (isExist)
            records_.insert({func_name, TimeRecorder(func_name, time_used)});
        else
            records_[func_name].time_usage_ms_.emplace_back(time_used);
        records_tmp_.emplace_back(func_name, time_used);
    }

    static void PrintRecentMsg() {
        std::cout << ">>> ===== Printing run time =====" << std::endl;
        for (auto item : records_tmp_) {
            std::cout << "> [ " << item.first << " ] time usage: " << item.second << " ms." << std::endl;
        }
        std::cout << ">>> ===== Printing run time end =====" << std::endl;
    }

    static void Reset() { records_tmp_.clear(); }

    static void PrintAll() {
        std::cout << ">>> ===== Printing run time =====";
        for (const auto& r : records_) {
            std::cout << "> [ " << r.first << " ] average time usage: "
                      << std::accumulate(r.second.time_usage_ms_.begin(), r.second.time_usage_ms_.end(), 0.0) / double(r.second.time_usage_ms_.size())
                      << " ms , called times: " << r.second.time_usage_ms_.size();
        }
        std::cout << ">>> ===== Printing run time end =====";
    }

    /// dump to a log file
    static void DumpIntoFile(const std::string& file_name) {
        std::ofstream ofs(file_name, std::ios::out);

        if (!ofs.is_open()) {
            std::cerr << "Failed to open file: " << file_name << "!!!" << std::endl;
            return;
        } else {
            std::cout << "Dump Time Records into file: " << file_name << std::endl;
        }

        ofs << ">>> ===== Printing run time =====" << std::endl;
        // 设置列宽
        const int nameWidth = 40;
        const int scoreWidth = 15;

        // 输出表头
        ofs << std::left << std::setw(nameWidth) << "Function" << std::setw(scoreWidth) << "Ave time"  // 平均运行时间
            << std::setw(scoreWidth) << "S.D time"                                                     // 运行时间标准差
            << std::setw(scoreWidth) << "Called times"                                                 // 调用次数
            << std::setw(scoreWidth) << "Normal time"  // 平均时间*相对主程序调用倍数->求得的时间
            << std::setw(scoreWidth) << "Real time"    // 考虑并行加速的真实物理时间
            << std::endl;
        // 输出分隔线
        ofs << std::setfill('-') << std::setw(nameWidth + 3 * scoreWidth) << "" << std::setfill(' ') << std::endl;

        // 获得最小计数，一般为主程序运行次数
        int min_count = 0;
        for (auto& r : records_) {
            if (!min_count) min_count = r.second.time_usage_ms_.size();
            if (r.second.time_usage_ms_.size() < min_count) min_count = r.second.time_usage_ms_.size();
        }
        // 遍历计算每一项
        //  map遍历顺序
        //  for (auto& r : records_) { &datas = r.second.data_vector_;
        //  vector 顺序
        for (auto& r : records_order_) {
            std::vector<double>& tim = records_[r].time_usage_ms_;
            // 时间均值
            double ave_time = std::accumulate(tim.begin(), tim.end(), 0.0) / double(tim.size());

            // 计算方差
            double variance = 0.0;
            for (const auto& num : tim) variance += (num - ave_time) * (num - ave_time);
            variance /= tim.size();

            ofs << std::fixed << std::setprecision(3) << std::left << std::setw(nameWidth) << r << std::setw(scoreWidth) << ave_time << std::setw(scoreWidth)
                << sqrt(variance) << std::setw(scoreWidth) << tim.size() << std::setw(scoreWidth) << ave_time * double(tim.size()) / double(min_count)
                << std::endl;
        }
        ofs << ">>> ===== Printing run time end =====" << std::endl;

        size_t max_length = 0;
        for (const auto& kv : records_) {
            ofs << std::setw(scoreWidth) << kv.first;
            max_length = std::max(max_length, kv.second.time_usage_ms_.size());
        }
        ofs << std::endl;

        for (size_t i = 0; i < max_length; ++i) {
            for (const auto& kv : records_) {
                const auto& iter = kv.second;
                if (i < iter.time_usage_ms_.size())
                    ofs << std::setw(scoreWidth) << iter.time_usage_ms_[i];
                else
                    ofs << std::setw(scoreWidth) << "-";
                ofs << std::setw(scoreWidth);
            }
            ofs << std::endl;
        }
        ofs.close();
    }

    /// get the average time usage of a function
    static double GetMeanTime(const std::string& func_name) {
        if (records_.find(func_name) == records_.end()) {
            return 0.0;
        }

        auto r = records_[func_name];
        return std::accumulate(r.time_usage_ms_.begin(), r.time_usage_ms_.end(), 0.0) / double(r.time_usage_ms_.size());
    }

    /// clean the records
    static void Clear() { records_.clear(); }

    // 将target_name移动到reference_name后面
    static void MoveRecord(const std::string& target_name, const std::string& reference_name) {
        auto target_it = std::find(records_order_.begin(), records_order_.end(), target_name);
        auto reference_it = std::find(records_order_.begin(), records_order_.end(), reference_name);
        if (target_it == records_order_.end() || reference_it == records_order_.end()) {
            std::cerr << "Target or Reference data not found " << std::endl;
            return;
        }
        records_order_.erase(target_it);
        auto insert_position = std::next(reference_it);
        records_order_.insert(insert_position, target_name);
    }

    //    private:
    static std::map<std::string, TimeRecorder> records_;
    static std::vector<std::string> records_order_;
    static std::vector<std::pair<std::string, double>> records_tmp_;
};

// std::map 有序，且是根据键值进行排序的，unordered_map 完全无序
std::map<std::string, Timer::TimeRecorder> Timer::records_;
std::vector<std::string> Timer::records_order_;
std::vector<std::pair<std::string, double>> Timer::records_tmp_;